No description has been provided for this image
In [44]:
# IMPORT LIBRARIES
In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import calendar
import plotly.graph_objects as go


import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
In [46]:
#!pip install plotly
In [47]:
data = pd.read_csv(r"C:\Users\khare\OneDrive\Desktop\python projects\Unemployment_Rate_upto_11_2020.csv")
data
Out[47]:
Region Date Frequency Estimated Unemployment Rate (%) Estimated Employed Estimated Labour Participation Rate (%) Region.1 longitude latitude
0 Andhra Pradesh 31-01-2020 M 5.48 16635535 41.02 South 15.9129 79.740
1 Andhra Pradesh 29-02-2020 M 5.83 16545652 40.90 South 15.9129 79.740
2 Andhra Pradesh 31-03-2020 M 5.79 15881197 39.18 South 15.9129 79.740
3 Andhra Pradesh 30-04-2020 M 20.51 11336911 33.10 South 15.9129 79.740
4 Andhra Pradesh 31-05-2020 M 17.43 12988845 36.46 South 15.9129 79.740
... ... ... ... ... ... ... ... ... ...
262 West Bengal 30-06-2020 M 7.29 30726310 40.39 East 22.9868 87.855
263 West Bengal 31-07-2020 M 6.83 35372506 46.17 East 22.9868 87.855
264 West Bengal 31-08-2020 M 14.87 33298644 47.48 East 22.9868 87.855
265 West Bengal 30-09-2020 M 9.35 35707239 47.73 East 22.9868 87.855
266 West Bengal 31-10-2020 M 9.98 33962549 45.63 East 22.9868 87.855

267 rows × 9 columns

In [48]:
#updating the column names
data.columns=["State","Date","Frequency","Estimated unemployment rate","Estimated employed","Estimated labour participation rate","Region","Longitude","Latitude"]
In [49]:
data.tail()
Out[49]:
State Date Frequency Estimated unemployment rate Estimated employed Estimated labour participation rate Region Longitude Latitude
262 West Bengal 30-06-2020 M 7.29 30726310 40.39 East 22.9868 87.855
263 West Bengal 31-07-2020 M 6.83 35372506 46.17 East 22.9868 87.855
264 West Bengal 31-08-2020 M 14.87 33298644 47.48 East 22.9868 87.855
265 West Bengal 30-09-2020 M 9.35 35707239 47.73 East 22.9868 87.855
266 West Bengal 31-10-2020 M 9.98 33962549 45.63 East 22.9868 87.855
In [50]:
data.shape
Out[50]:
(267, 9)
In [51]:
data.columns
Out[51]:
Index(['State', 'Date', 'Frequency', 'Estimated unemployment rate',
       'Estimated employed', 'Estimated labour participation rate', 'Region',
       'Longitude', 'Latitude'],
      dtype='object')
In [52]:
data.describe()
Out[52]:
Estimated unemployment rate Estimated employed Estimated labour participation rate Longitude Latitude
count 267.000000 2.670000e+02 267.000000 267.000000 267.000000
mean 12.236929 1.396211e+07 41.681573 22.826048 80.532425
std 10.803283 1.336632e+07 7.845419 6.270731 5.831738
min 0.500000 1.175420e+05 16.770000 10.850500 71.192400
25% 4.845000 2.838930e+06 37.265000 18.112400 76.085600
50% 9.650000 9.732417e+06 40.390000 23.610200 79.019300
75% 16.755000 2.187869e+07 44.055000 27.278400 85.279900
max 75.850000 5.943376e+07 69.690000 33.778200 92.937600
In [53]:
data.isnull().sum()
Out[53]:
State                                  0
Date                                   0
Frequency                              0
Estimated unemployment rate            0
Estimated employed                     0
Estimated labour participation rate    0
Region                                 0
Longitude                              0
Latitude                               0
dtype: int64
In [54]:
data.duplicated().any()
Out[54]:
np.False_
In [55]:
data.State.value_counts()
Out[55]:
State
Andhra Pradesh      10
Assam               10
Bihar               10
Chhattisgarh        10
Delhi               10
Goa                 10
Gujarat             10
Haryana             10
Himachal Pradesh    10
Jharkhand           10
Karnataka           10
Rajasthan           10
Kerala              10
Madhya Pradesh      10
Maharashtra         10
Meghalaya           10
Odisha              10
Puducherry          10
Punjab              10
Uttarakhand         10
Tamil Nadu          10
Telangana           10
Tripura             10
West Bengal         10
Uttar Pradesh       10
Jammu & Kashmir      9
Sikkim               8
Name: count, dtype: int64
In [56]:
data.Region.value_counts()
Out[56]:
Region
North        79
South        60
West         50
East         40
Northeast    38
Name: count, dtype: int64
In [57]:
# # Converting "Date" column to Datetime format
# Clean column names
data.columns = data.columns.str.strip().str.lower()

# Convert to datetime
data['date'] = pd.to_datetime(data['date'], dayfirst=True, errors='coerce')

# Convert categorical columns
data['frequency'] = data['frequency'].astype('category')
data['region'] = data['region'].astype('category')

📅 Extracting Month From Date Attribute

In [58]:
data['Month']= data['date'].dt.month
In [59]:
#converting 'month' to integer format
data['Month_int'] = data['Month'].apply(lambda x: int(x))

# Mapping integer month values to abbreviated month names
data['Month_name'] = data['Month_int'].apply(lambda x: calendar.month_abbr[x])
In [60]:
#Dropping the original 'Month' column
data.drop(columns='Month', inplace=True)
In [61]:
data['Month'] = data['Month_int'].apply(lambda x: calendar.month_abbr[x])
In [62]:
data.tail()
Out[62]:
state date frequency estimated unemployment rate estimated employed estimated labour participation rate region longitude latitude Month_int Month_name Month
262 West Bengal 2020-06-30 M 7.29 30726310 40.39 East 22.9868 87.855 6 Jun Jun
263 West Bengal 2020-07-31 M 6.83 35372506 46.17 East 22.9868 87.855 7 Jul Jul
264 West Bengal 2020-08-31 M 14.87 33298644 47.48 East 22.9868 87.855 8 Aug Aug
265 West Bengal 2020-09-30 M 9.35 35707239 47.73 East 22.9868 87.855 9 Sep Sep
266 West Bengal 2020-10-31 M 9.98 33962549 45.63 East 22.9868 87.855 10 Oct Oct

📊 Exploratory Data Analysis

In [63]:
#Basic Statistics
data_stats = data[['estimated unemployment rate', 'estimated employed', 'estimated labour participation rate']]
round(data_stats.describe().T, 2)
Out[63]:
count mean std min 25% 50% 75% max
estimated unemployment rate 267.0 12.24 10.80 0.50 4.84 9.65 16.76 75.85
estimated employed 267.0 13962105.72 13366318.36 117542.00 2838930.50 9732417.00 21878686.00 59433759.00
estimated labour participation rate 267.0 41.68 7.85 16.77 37.26 40.39 44.06 69.69
In [64]:
region_stats = data.groupby(['region'])[['estimated unemployment rate', 'estimated employed', 'estimated labour participation rate']].mean().reset_index()
round(region_stats, 2)
Out[64]:
region estimated unemployment rate estimated employed estimated labour participation rate
0 East 13.92 19602366.90 40.11
1 North 15.89 13072487.92 38.70
2 Northeast 10.95 3617105.53 52.06
3 South 10.45 14040589.33 40.44
4 West 8.24 18623512.72 41.26

🧠📊 Data Visualizations

🗺️📍State Wise Analysis in Every Month

In [65]:
IMD = data.groupby(["Month"])[['estimated unemployment rate', 'estimated employed', 'estimated labour participation rate']].mean()
IMD = pd.DataFrame(IMD).reset_index()
In [66]:
# Step 1: Import required libraries
# import plotly.graph_objects as go
import plotly.io as pio
#pio.kaleido.scope.default_format = "png"

# Step 2: Set the renderer (choose based on your environment)
pio.renderers.default = 'notebook'  # Use 'browser' if you're not in Jupyter

# Step 3: Prepare the data (make sure your DataFrame 'IMD' is already defined)
# Example column names assumed: 'Month', 'estimated unemployment rate (%)', 'estimated labour participation rate (%)'

# Check column names to avoid types
print(IMD.columns)

# Step 4: Extract data
month = IMD["Month"]
unemployment_rate = IMD["estimated unemployment rate"]
labour_participation_rate = IMD["estimated labour participation rate"]

# Step 5: Create the figure
fig = go.Figure()

fig.add_trace(go.Bar(
    x=month,
    y=unemployment_rate,
    name="Unemployment Rate",
    marker_color='indianred'
))

fig.add_trace(go.Bar(
    x=month,
    y=labour_participation_rate,
    name="Labour Participation Rate",
    marker_color='lightskyblue'
))

# Step 6: Customize the layout
fig.update_layout(
    title="Unemployment Rate vs Labour Participation Rate",
    xaxis_title="Month",
    yaxis_title="Percentage",
    barmode='group',  # side-by-side bars
    template='plotly_white'
)

# Step 7: Show the chart
fig.show()
Index(['Month', 'estimated unemployment rate', 'estimated employed',
       'estimated labour participation rate'],
      dtype='object')
In [67]:
#!pip install -U kaleido
In [68]:
print(IMD.head())
  Month  estimated unemployment rate  estimated employed  \
0   Apr                    22.236154        1.057020e+07   
1   Aug                    10.313333        1.442904e+07   
2   Feb                     9.266154        1.548827e+07   
3   Jan                     9.196538        1.563720e+07   
4   Jul                     9.834444        1.441802e+07   

   estimated labour participation rate  
0                            35.297308  
1                            42.390741  
2                            44.180769  
3                            44.626538  
4                            42.274815  

📊📈Bar Plot of Estimated Employed Citizens in Every Month

In [69]:
import plotly.express as px
In [70]:
fig = px.bar(IMD, x='Month', y='estimated employed', color='Month',
            category_orders = {"Month":["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct"]},
            title = 'Estimated employed people from Jan 2020 to Oct 2020')

fig.show()

Significant Impact of the Nationwide Lockdown:

The data clearly shows a sharp and widespread increase in the estimated unemployment rate across all regions of India during April and May 2020.

This directly correlates with the implementation of the nationwide lockdown to curb the spread of COVID-19. The unemployment rate peaked during these months, demonstrating the immediate and severe disruption to economic activities and job markets.

📍📊Regional Analysis

In [71]:
State = data.groupby("region")[['estimated unemployment rate','estimated employed','estimated labour participation rate']].mean()
State = pd.DataFrame(State).reset_index()
In [72]:
#box Plot

fig = px.box(data,x='state',y='estimated unemployment rate',color='state',title='Unemployment rate')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

Significant Impact of the Nationwide Lockdown:

The data clearly shows a sharp and widespread increase in the estimated unemployment rate across all regions of India during April and May 2020. This directly correlates with the implementation of the nationwide lockdown to curb the spread of COVID-19. The unemployment rate peaked during these months, demonstrating the immediate and severe disruption to economic activities and job markets. Varied Regional Impact and Recovery:

While all regions experienced a surge in unemployment during the lockdown, there were noticeable differences in the peak rates and recovery trajectories. Regions like 'Urban' and 'North' generally showed higher unemployment rates during the peak lockdown period compared to others, but also experienced a more pronounced recovery in the subsequent months. The average unemployment rate also varied significantly across regions, with some regions consistently showing higher overall unemployment than others throughout the period. State-Level Disparities in Lockdown Impact:

The lockdown's impact on unemployment was not uniform across states. Some states experienced a much more drastic surge in unemployment than others. Most Affected States: Puducherry, Jharkhand, Tamil Nadu, Bihar, and Karnataka recorded the largest increases in unemployment rates when comparing the lockdown period (April-June 2020) to the pre-lockdown period (January-March 2020). This suggests these states' economies or labor markets were particularly vulnerable to the lockdown measures. States with Higher Overall Average Unemployment: Over the entire period, states like Haryana, Tripura, and Jammu & Kashmir consistently had higher average estimated unemployment rates. States with Lower Overall Average Unemployment: Conversely, states such as Meghalaya, Sikkim, and Chhattisgarh maintained relatively lower average estimated unemployment rates, indicating a more stable employment scenario or different economic structures.

⚖️Average Unemployment Rate Bar Plot

In [73]:
# Grouping data to get the average unemployment rate by state
state_avg = data.groupby('state')['estimated unemployment rate'].mean().reset_index()

# Rename columns for simplicity
state_avg.columns = ['state', 'avg_unemployment_rate']
fig = px.bar(
    state_avg,
    x='state',
    y='avg_unemployment_rate',
    color="state",
    title="Average Unemployment Rate by State"
)
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

Haryana and Tripura was having the highest average amount of Unemployment Rate

Meghalaya was having the lowest average amount of Unemployment Rate

🔥📈Correlation Heatmap

In [74]:
heat_maps = data[["estimated unemployment rate", "estimated employed","estimated labour participation rate",'longitude','latitude','Month_int']]
heat_maps = heat_maps.corr()
plt.figure(figsize=(10,5))
sns.set_context("notebook",font_scale=1)
sns.heatmap(heat_maps,annot=True , cmap="coolwarm")
Out[74]:
<Axes: >
No description has been provided for this image

🌠🧩Scatter Matrix

In [75]:
fig = px.scatter_matrix(data, template='plotly',
                        dimensions=['estimated unemployment rate', 'estimated employed', 'estimated labour participation rate'],
                        color='region')
fig.show()
fig.write_image('unemployment_plot.png')

🔍 🤔Animated bar plot of Unemployment rate across region from Jan.2020 to Oct.2020

In [76]:
fig = px.bar(data, x='region', y='estimated unemployment rate', animation_frame='Month_name', color='state',
             title='Unemployment rate across region from Jan.2020 to Oct.2020', height=700, template='plotly')
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000
fig.show()

☀️🌀Sunburst chart

In [77]:
unemplo_df = data[['state', 'region', 'estimated unemployment rate', 'estimated employed', 'estimated labour participation rate']]
unemplo = unemplo_df.groupby(['region', 'state'])['estimated unemployment rate'].mean().reset_index()
fig = px.sunburst(unemplo, path=['region', 'state'], values='estimated unemployment rate',
                  color_continuous_scale='Plasma', title='Unemployment rate in each region and state',
                  height=650, template='ggplot2')
fig.show()

🚧🚀Monthly Unemployment Rate

In [78]:
#Impact of Lockdown on States Estimated Employed

fig = px.scatter_geo(data,'longitude', 'latitude', color="region",
                     hover_name="state", size="estimated unemployment rate",
                     animation_frame="Month_name",scope='asia',template='seaborn',title='Impack of lockdown on Employement across regions')

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 2000

fig.update_geos(lataxis_range=[5,35], lonaxis_range=[65, 100],oceancolor="lightblue",
    showocean=True)

fig.show()
In [79]:
data.region.unique()
Out[79]:
['South', 'Northeast', 'East', 'West', 'North']
Categories (5, object): ['East', 'North', 'Northeast', 'South', 'West']

📉😔Unemployment Rate Before and After Lockdown

In [80]:
#data representation before and after the lockdown

before_lockdown = data[(data['Month_int']>=1) & (data['Month_int']<4)]
after_lockdown =  data[(data['Month_int']>=4) & (data['Month_int']<=6)]
In [81]:
af_lockdown=after_lockdown.groupby('state')['estimated unemployment rate'].mean().reset_index()
lockdown= before_lockdown.groupby('state')['estimated unemployment rate'].mean().reset_index()
lockdown['Unemployment Rate before lockdown'] = af_lockdown['estimated unemployment rate']

lockdown.columns=['state','Unemployment Rate Before Lockdown','Unemployment Rate After Lockdown']

lockdown.head()
Out[81]:
state Unemployment Rate Before Lockdown Unemployment Rate After Lockdown
0 Andhra Pradesh 5.700000 13.750000
1 Assam 4.613333 7.070000
2 Bihar 12.110000 36.806667
3 Chhattisgarh 8.523333 9.380000
4 Delhi 18.036667 25.713333
In [82]:
# percentage change in unemployment rate

lockdown['rate change in unemployment'] = round(lockdown['Unemployment Rate After Lockdown'] -lockdown['Unemployment Rate Before Lockdown']/lockdown['Unemployment Rate Before Lockdown'],2)
plot_per = lockdown.sort_values('rate change in unemployment')
In [83]:
# percentage change in unemployment after lockdown

fig = px.bar(plot_per, x='state',y='rate change in unemployment',color='state',
            title='percentage change in Unemployment in each state after lockdown',template='ggplot2')
fig.show()

Most Impacted States/Union Territories

  1. Puducherry

  2. Jharkhand

  3. Bihar

  4. Haryana

  5. Tripura

🔒🚫Impact of Lockdown on Employment Across States

In [84]:
# function to sort value based on impact

def sort_impact(x):
    if x <= 10:
        return 'impacted States'
    elif x <= 20:
        return 'hard impacted States'
    elif x <= 30:
        return 'harder impacted States'
    elif x <= 46:
        return 'hardest impacted States'
    return x
In [85]:
plot_per['impact status'] = plot_per['rate change in unemployment'].apply(lambda x:sort_impact(x))
In [86]:
fig = px.bar(plot_per, y='state',x='rate change in unemployment',color='impact status',
            title='Impact of lockdown on employment across states',template='ggplot2',height=650)


fig.show()

✨FUTURE RECOMMENDATIONS ✨

Here's a brief summary of the unemployment analysis in India from January to October 2020 and future recommendations:

Key Insights Significant Lockdown Impact: Unemployment rates sharply increased across all regions, peaking in April-May 2020 due to the nationwide lockdown.

Varied Regional and State Impact: While all areas were affected, some regions (e.g., Urban, North) and states (e.g., Puducherry, Jharkhand, Tamil Nadu, Bihar, Karnataka) experienced a more severe surge and varied recovery trajectories. States like Haryana and Tripura showed consistently higher overall unemployment, while Meghalaya and Sikkim had lower rates.

Gradual Recovery: Post-lockdown, unemployment rates gradually declined, but many areas had not returned to pre-lockdown levels by October 2020. Future Recommendations

Targeted Programs: Implement state-specific employment and re-skilling programs, focusing on the most affected states and sectors.

Business Support: Provide financial and logistical aid to vulnerable MSMEs and labor-intensive industries.

Social Safety Nets: Enhance unemployment benefits and income support programs.

Regional/Rural Focus: Prioritize rural development and non-agricultural job creation to balance growth.

Data-Driven Policies: Continuously analyze data for agile policy responses.

Promote Entrepreneurship: Support entrepreneurship with easier credit and mentorship.

In [ ]: